In [1]:
#On va mettre dans une base de données, pour chaque région, les share across 28 maladies du burden et des essais
#1- On fait matrice burden par catégorie
#################################################################################
library(foreach)
library(doParallel)
#GBD 2005
#Data as downloaded from GBD 2010 study, not included in the repository
gbds <- list.files('/media/igna/Elements/HotelDieu/Cochrane/GBD 2010/GBD_2005_2010_by_cause_country_level_GBD2010/2005')
gbds <- gbds[grep('csv',gbds,ignore.case=TRUE)]
cl<-makeCluster(4)
registerDoParallel(cl)
t0 <- proc.time()
A <- foreach(k = gbds) %dopar% {
Mcause <- read.csv(paste('/media/igna/Elements/HotelDieu/Cochrane/GBD 2010/GBD_2005_2010_by_cause_country_level_GBD2010/2005/',k,sep=""))
#Restriction aux données: toutes les ages, tous les sexes, toutes les metriques (dalys, morts, yll, yld), nombre total (sans intervalle de confiance),nb par mill inhab
Mcause[Mcause$age_name=='All ages' & Mcause$sex=='Both sexes',c(1:8,11,12,18)]
}
stopCluster(cl)
DT <- do.call('rbind',A)
(proc.time()-t0)/60
#1.5min
In [15]:
table(is.na(DT$causelevel4))
table(DT$causelevel4=="")
Burden is evaluated at least at level 4
In [16]:
dis4 <- unique(paste(DT$causelevel3,DT$causelevel4))
#diseases evaluated at level 4
isn <- dis4%in%paste(DT$causelevel3,DT$causelevel4)[is.na(DT$causelevel5) | DT$causelevel5==""]
table(isn)
All diseases are evaluated at level 4, and in the 27-class grouping there is no need of level 5, so we suppress it
In [20]:
DT$causelevel5[is.na(DT$causelevel5)] <- ""
DT <- DT[DT$causelevel5=="",]
In [21]:
#Taxonomie à 27 catégories
Mgbd <- read.table("../Data/27_gbd_groups.txt")
In [25]:
table(is.na(DT$causelevel4))
In [26]:
#We add diseases evaluated at level 4
dt <- DT[DT$causelevel4%in%as.character(Mgbd$x),]
#List of diseases added
ML <- dt[!duplicated(paste(dt$causelevel1,dt$causelevel2,dt$causelevel3,dt$causelevel4)),]
table(Mgbd$x%in%c(as.character(dt$causelevel4)))
#FALSE TRUE
# 15 13
In [28]:
#We have to do manually for the others
Mgbd$x[!Mgbd$x%in%c(as.character(dt$causelevel4))]
In [29]:
#Maternal disorders
Aj <- DT[DT$causelevel3%in%c("Maternal disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [31]:
#Neonatal disorders
Aj <- DT[DT$causelevel3%in%c("Neonatal disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [32]:
#Nutritional deficiencies
Aj <- DT[DT$causelevel3%in%c("Nutritional deficiencies"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [33]:
#Neoplasms
Aj <- DT[DT$causelevel3%in%c("Neoplasms"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [34]:
#Cardiovascular and circulatory diseases
Aj <- DT[DT$causelevel3%in%c("Cardiovascular and circulatory diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [35]:
#Chronic respiratory diseases
Aj <- DT[DT$causelevel3%in%c("Chronic respiratory diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [36]:
#Cirrhosis of the liver
Aj <- DT[DT$causelevel3%in%c("Cirrhosis of the liver"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [38]:
#Digestive diseases (except cirrhosis)
Aj <- DT[DT$causelevel3%in%c("Digestive diseases (except cirrhosis)"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [39]:
#Neurological disorders
Aj <- DT[DT$causelevel3%in%c("Neurological disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [40]:
#Mental and behavioral disorders
Aj <- DT[DT$causelevel3%in%c("Mental and behavioral disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [41]:
#Musculoskeletal disorders
Aj <- DT[DT$causelevel3%in%c("Musculoskeletal disorders"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [42]:
#Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases
Aj <- DT[DT$causelevel3%in%c("Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [43]:
#Neglected tropical diseases excluding malaria
Aj <- DT[DT$causelevel3%in%c("Neglected tropical diseases and malaria") & !DT$causelevel4%in%c("Malaria"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- "Neglected tropical diseases excluding malaria"
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [44]:
#Diabetes, urinary diseases and male infertility
Aj <- DT[DT$causelevel4%in%c("Diabetes mellitus","Acute glomerulonephritis","Urinary diseases and male infertility","Chronic kidney diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- "Diabetes, urinary diseases and male infertility"
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [46]:
table(Mgbd$x%in%c(as.character(dt$causelevel2[dt$causelevel3==""]),as.character(dt$causelevel3[dt$causelevel4==""]),as.character(dt$causelevel4[dt$causelevel5==""])))
table(Mgbd$x%in%c(as.character(dt$causelevel2[dt$causelevel3!=""]),as.character(dt$causelevel3[dt$causelevel4!=""]),as.character(dt$causelevel4[dt$causelevel5!=""])))
U <- unique(paste(dt$causelevel1,dt$causelevel2,dt$causelevel3,dt$causelevel4,dt$causelevel5))
length(U)
Ok, we have the 27 groups of diseases
In [50]:
#Diseases not included
table(dis4%in%paste(ML$causelevel3,ML$causelevel4))
sort(dis4[!dis4%in%paste(ML$causelevel3,ML$causelevel4)])
Not included in the 27-class grouping:
These two residual categories are in particular excluded because they are not included in the arrival space of the classifier, because too complicated
In [53]:
#We add excluded categories to dt to evaluate what amount of burden we are excluding
dt$cats27 <- TRUE
In [54]:
#Injuries
Aj <- DT[DT$causelevel2%in%c("Injuries"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- ""
Mttp$causelevel3 <- ""
Mttp$cats27 <- FALSE
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [55]:
#Residual categories
Aj <- DT[DT$causelevel4%in%c("Other endocrine, nutritional, blood, and immune disorders","Other infectious diseases"),]
Mttp <- Aj[!duplicated(paste(Aj$country_name,Aj$measure)),]
Mttp <- Mttp[order(paste(Mttp$country_name,Mttp$measure)),]
Mttp$nm_mean <- tapply(Aj$nm_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$rt_mean <- tapply(Aj$rt_mean,paste(Aj$country_name,Aj$measure),sum)
Mttp$causelevel4 <- "Excluded residual categories"
Mttp$cats27 <- FALSE
dt <- rbind(dt,Mttp)
ML <- rbind(ML,Aj[!duplicated(paste(Aj$causelevel1,Aj$causelevel2,Aj$causelevel3,Aj$causelevel4)),])
In [56]:
table(dis4%in%paste(ML$causelevel3,ML$causelevel4))
Ok, we included all the burden
In [57]:
#We include variable Diseases = last causelevel without ""
dt$Disease <- as.character(dt$causelevel4)
dt$Disease[dt$Disease==""] <- as.character(dt$causelevel3[dt$Disease==""])
dt$Disease[dt$Disease==""] <- as.character(dt$causelevel2[dt$Disease==""])
In [58]:
table(dt$Disease%in%as.character(Mgbd$x))
length(unique(dt$Dis))
Ok: 27 + injuries and residual
In [59]:
dt$Sup_region <- dt$region
levels(dt$region)
In [60]:
levels(dt$Sup_region) <- c(
"Latin America and Caribbean", "High-income",
"Latin America and Caribbean", "Central Europe, Eastern Europe, and Central Asia",
"Central Europe, Eastern Europe, and Central Asia", "Latin America and Caribbean",
"Sub-Saharian Africa", "Southeast Asia, East Asia and Oceania",
"Central Europe, Eastern Europe, and Central Asia", "Sub-Saharian Africa",
"High-income", "High-income",
"North Africa and Middle East", "Southeast Asia, East Asia and Oceania",
"South Asia", "Southeast Asia, East Asia and Oceania",
"High-income", "Sub-Saharian Africa",
"Latin America and Caribbean", "High-income",
"Sub-Saharian Africa")
#Region level
GBD <- dt
meas <- levels(GBD$mes)
dis <- levels(GBD$Dis)
reg <- levels(GBD$Sup_reg)
S <- tapply(GBD$nm_mean,paste(GBD$meas,GBD$Sup_region,GBD$Disease,sep="&"),sum)
L <- strsplit(names(S),"&")
M <- data.frame(do.call('rbind',L))
names(M) <- c("metr","Region","Disease")
M$burden <- as.numeric(S)
In [61]:
write.table(M,"../Data/DALY_YLL_deaths_per_region_and_27_and_excluded_diseases_2005.txt")
In [ ]: